* This dofile cleans and appends electoral results at the candidate level.

****** Unique candidate identifier ******

// Files updated and provided to us while the project was ongoing
use "$raw_data/elections/cand_across_elections_samedis", clear
append using "$raw_data/elections/cand_across_elections_samedis_others"

keep id_unique_cand last_name_cand first_name_cand id_unique_cand_prev last_name_cand_prev first_name_cand_prev election_type id_district year

save "$intermediate/dataset_cand_id_prev", replace

gen id_unique_cand_next=id_unique_cand
replace id_unique_cand=id_unique_cand_prev

keep if id_unique_cand!=.

rename year year_next 
rename election_type election_type_next
rename last_name_cand last_name_cand_next
rename first_name_cand first_name_cand_next
rename id_district id_district_next
keep *_next id_unique_cand

save "$temp/temp", replace

use "$intermediate/dataset_cand_id_prev", clear

merge 1:1 id_unique_cand using "$temp/temp"

keep id_unique_cand id_unique_cand_prev id_unique_cand_next last_name_cand_prev last_name_cand first_name_cand first_name_cand_prev first_name_cand_next last_name_cand_next 

// Compare to initial file provided to us at the beginning of the project
rename * *_temp

save "$temp/temp", replace

use "$raw_data/elections/dataset_cand_id_across_elections", clear //initial file

gen id_unique_cand_temp=id_unique_cand

merge 1:1 id_unique_cand_temp using  "$temp/temp"

li last_name_cand_prev_temp first_name_cand_prev_temp last_name_cand first_name_cand id_unique_cand_prev_temp id_unique_cand if  _merge==3 & id_unique_cand_prev==. & id_unique_cand_prev_temp!=.

li last_name_cand_next_temp first_name_cand_next_temp last_name_cand first_name_cand id_unique_cand_next_temp id_unique_cand if _merge==3 & id_unique_cand_next==. & id_unique_cand_next_temp!=.

count if id_unique_cand_prev_temp==. & id_unique_cand_prev!=.
assert r(N)==0 
count if id_unique_cand_next_temp==. & id_unique_cand_next!=.
assert r(N)==0 

replace id_unique_cand_prev=id_unique_cand_prev_temp if id_unique_cand_prev==. & id_unique_cand_prev_temp!=. 
replace id_unique_cand_next=id_unique_cand_next_temp if id_unique_cand_next==. & id_unique_cand_next_temp!=. 

drop _merge *_temp

save "$intermediate/dataset_cand_id_across_elections_new", replace

****** Electoral results at candidate level ******

clear
use "$raw_data/elections/leg_database_redistrict"
append using "$raw_data/elections/cant_database_redistrict"

// Reshape to have one observation per candidate and change the way candidates variables are numbered
forvalues i=1/48 {
foreach var in nb_votes prop_registered_votes prop_voters_votes {
ren `var'_cand`i'_R1 `var'_cand_R1`i' 
ren `var'_cand`i'_R2 `var'_cand_R2`i' 
}
} 

reshape long last_name_cand first_name_cand gender_cand political_label_cand nb_votes_cand_R1 nb_votes_cand_R2 ///
prop_registered_votes_cand_R1 prop_registered_votes_cand_R2 prop_voters_votes_cand_R1 prop_voters_votes_cand_R2, i(id_unique) j(candidate)

drop if nb_votes_cand_R1==.

// Create variables = ranking of each candidate in each round
forvalues i=1/2 {
gsort id_unique -nb_votes_cand_R`i' last_name_cand first_name_cand political_label_cand
bysort id_unique : gen ranking_cand_R`i'=_n if nb_votes_cand_R`i'!=.
label variable ranking_cand_R`i' "candidate's ranking, round `i'"
}

// Correct the ranking for candidate ex aequo in 2nd round
byso id_unique nb_votes_cand_R2: gen temp=_N
li id_unique last_name_cand ranking_cand_R2 nb_votes_cand_R2 political_label_cand if temp==2 & nb_votes_cand_R2!=.
drop temp

// NB: rule: highest ranking to the older candidate
replace ranking_cand_R2=1 if political_label_cand=="UDI" & id_unique==2061032015
replace ranking_cand_R2=2 if political_label_cand=="SOC" & id_unique==2061032015

replace ranking_cand_R2=1 if political_label_cand=="SOC" & id_unique==2005172004
replace ranking_cand_R2=2 if political_label_cand=="UMP" & id_unique==2005172004

replace ranking_cand_R2=1 if political_label_cand=="SOC" & id_unique==2006142001
replace ranking_cand_R2=2 if political_label_cand=="DL" & id_unique==2006142001

replace ranking_cand_R2=1 if political_label_cand=="DVD" & last_name_cand=="BOURZAT" & id_unique==2019341985
replace ranking_cand_R2=2 if political_label_cand=="SOC" & id_unique==2019341985

replace ranking_cand_R2=1 if political_label_cand=="UDF" & id_unique==2038151979
replace ranking_cand_R2=2 if political_label_cand=="SOC" & id_unique==2038151979

replace ranking_cand_R2=1 if political_label_cand=="DVD" & id_unique==2043212008
replace ranking_cand_R2=2 if political_label_cand=="UMP" & id_unique==2043212008

replace ranking_cand_R2=2 if political_label_cand=="UDF" & last_name_cand=="GENG" & id_unique==2061061998
replace ranking_cand_R2=3 if political_label_cand=="SOC" & id_unique==2061061998

replace ranking_cand_R2=3 if political_label_cand=="DVD" & last_name_cand=="JUILLET" & id_unique==2071041994
replace ranking_cand_R2=2 if political_label_cand=="SOC" & id_unique==2071041994

replace ranking_cand_R2=1 if last_name_cand=="COLLAUDIN" & id_unique==2071111992
replace ranking_cand_R2=2 if last_name_cand=="FEUILLET" & id_unique==2071111992

replace ranking_cand_R2=1 if political_label_cand=="RDG" & id_unique==2202051979
replace ranking_cand_R2=2 if political_label_cand=="RPR" & id_unique==2202051979

// New adjustment due to update with DOM TOM
replace ranking_cand_R2=1 if political_label_cand=="DMF" & last_name_cand=="MAURICE" & id_unique==2972221979
replace ranking_cand_R2=2 if political_label_cand=="RPR" & last_name_cand=="MORIN" & id_unique==2972221979

// NB: we write down the replacement to keep trace of the rule but some of these rankings were randomly well-assigned in the first place
//we leave aside 2 ex-aequo pairs from the 2015 local elections, which we won't use in the ananlysis

// Merge with the political orientation + parties/coalitions classified manually 
replace political_label_cand="" if political_label_cand=="NA"
merge m:1 year political_label_cand using "$raw_data/elections/political_orientations_wparty_all"

tab political_label if _merge==1
tab year political_label_cand if _merge==2 
// NB: some labels do not merge because our dataset does not include 1976 and one label is missing for 1962 (IND), but to avoid re-doing the party classification on a different list of labels we stick to this original list

// The following block is commented out as we needed to run it only once to get the information required for party classification
/** Outsheet list of nuances with number of candidates and vote shares for party classification
byso id_unique: gen temp=_n
byso election_type year: egen temp2=total(nb_registered_R1) if temp==1 
byso election_type year: egen tot_registered=min(temp2) //total number of registered per election year
drop temp*
byso election_type year political_label_cand: egen tot_vot=total(nb_votes_cand_R1) //total number of votes by nuance
gen tot_votesh=tot_vot/tot_registered
sum tot_votesh

byso election_type year: gen temp=_N if _merge!=2 //total number of candidates
byso election_type year political_label_cand: gen temp2=_N if _merge!=2  //number of candidates from nuance
gen tot_candsh=temp2/temp
sum tot_candsh
drop temp*

byso year political_label_cand (election_type): gen temp=_n //note that in years with two election types we keep only one obs. per label, so the indicated vote share and candidate share is valid for one election type only (but indicative of the overall party strength in that year)
so year political_label_cand
li year political_label_cand political_orientation_cand party_cand tot_candsh tot_votesh if temp==1 & political_label_cand!=""
outsheet year political_label_cand political_orientation_cand party_cand tot_candsh tot_votesh if temp==1 & political_label_cand!="" using "$intermediate/political_orientations_wparty_wsize.xls", replace
*/

drop if _merge==2 //drops nuances we do not use
drop _merge
order political_orientation_cand party_cand party_name coalition1 coalition2 coalition, after(political_label_cand)

byso id_unique ranking_cand_R1: gen temp=_N
ta temp //unique identifier
assert temp==1
drop temp
so id_unique ranking_cand_R1
gen id_unique_cand=_n

label var id_unique_cand "candidate identifier"

save "$intermediate/initial_dataset_cand_lvl", replace 

